ERROR ANALYSIS¶
In [1]:
!pip install -q torch datasets transformers[torch] spacy[cuda11x] spacy_fastlang lime shap
In [105]:
import pickle
import numpy as np
import pandas as pd
import spacy
import spacy_fastlang
import torch
import shap
from datasets import Dataset, DatasetDict
from scipy.stats import binomtest
from scipy.stats.distributions import loguniform
from sklearn.dummy import DummyClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import (
accuracy_score,
f1_score,
make_scorer,
top_k_accuracy_score,
)
from sklearn.metrics import classification_report
from sklearn.model_selection import (
RandomizedSearchCV,
StratifiedShuffleSplit,
train_test_split,
)
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from tqdm.auto import tqdm
from transformers.pipelines.pt_utils import KeyDataset
#spacy.require_gpu()
tqdm.pandas()
pd.options.mode.copy_on_write = True
pd.options.display.max_colwidth = 1000
In [106]:
with open("classifier.pkl", "rb") as f:
classifier = pickle.load(f)
torch.set_float32_matmul_precision("high")
print(classifier.model)
RobertaForSequenceClassification(
(roberta): RobertaModel(
(embeddings): RobertaEmbeddings(
(word_embeddings): Embedding(50265, 768, padding_idx=1)
(position_embeddings): Embedding(514, 768, padding_idx=1)
(token_type_embeddings): Embedding(1, 768)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): RobertaEncoder(
(layer): ModuleList(
(0-11): 12 x RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
)
(classifier): RobertaClassificationHead(
(dense): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(out_proj): Linear(in_features=768, out_features=4, bias=True)
)
)
In [11]:
from datasets import load_from_disk
dataset = load_from_disk("Project2_data")
In [107]:
train_df = pd.read_pickle('Project2_train_data.pkl.gz')
test_df = pd.read_pickle('Project2_test_data.pkl.gz')
valid_df = pd.read_pickle('Project2_valid_data.pkl.gz')
train_df ["label"] = train_df ["wine_group"]
train_df ["text"] = train_df ["review_text"]
test_df ["label"] = test_df ["wine_group"]
test_df ["text"] = test_df ["review_text"]
valid_df ["label"] = valid_df ["wine_group"]
valid_df ["text"] = valid_df ["review_text"]
In [108]:
dataset = DatasetDict()
dataset["train"] = Dataset.from_pandas(train_df)
dataset["valid"] = Dataset.from_pandas(valid_df)
dataset["test"] = Dataset.from_pandas(test_df)
dataset = dataset.class_encode_column("label")
#dataset.save_to_disk("lab09_dataset")
Casting to class labels: 0%| | 0/7500 [00:00<?, ? examples/s]
Casting to class labels: 0%| | 0/5000 [00:00<?, ? examples/s]
Casting to class labels: 0%| | 0/2500 [00:00<?, ? examples/s]
In [109]:
def evaluate(pipeline, dataset, batch_size=32):
results = list(
tqdm(
pipeline(KeyDataset(dataset, "text"), batch_size=32),
total=len(dataset),
)
)
predicted = [r["label"] for r in results]
labels = sorted(set(predicted))
scores = np.vstack(
[
[r[label] for label in labels]
for r in [{r["label"]: r["score"] for r in items} for items in results]
]
)
return predicted, scores
In [31]:
labels = sorted(set(predicted))
labels
Out[31]:
['Bold Reds', 'Full-bodied Whites', 'Light-bodied, Crisp Whites', 'Medium to Full-bodied Reds']
In [ ]:
#valid_df = pd.read_pickle('Project2_valid_data.pkl.gz')
valid_df['predicted'] = predicted
valid_df.head()
In [41]:
errors = valid_df.query('wine_group != predicted').sample(30)
In [42]:
import shap
In [43]:
explainer = shap.Explainer(shap.models.TransformersPipeline(classifier,rescale_to_logits=True), output_names=labels)
shap_values = explainer(KeyDataset(Dataset.from_pandas(errors), "review_text"))
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 3%|▎ | 1/30 [00:00<?, ?it/s]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 10%|█ | 3/30 [00:18<01:51, 4.13s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 13%|█▎ | 4/30 [00:26<02:32, 5.86s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 17%|█▋ | 5/30 [00:35<02:49, 6.77s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 20%|██ | 6/30 [00:43<02:54, 7.29s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 23%|██▎ | 7/30 [00:51<02:55, 7.61s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 27%|██▋ | 8/30 [01:00<02:52, 7.86s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 30%|███ | 9/30 [01:08<02:47, 7.99s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 33%|███▎ | 10/30 [01:16<02:42, 8.10s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 37%|███▋ | 11/30 [01:25<02:34, 8.15s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 40%|████ | 12/30 [01:33<02:27, 8.19s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 43%|████▎ | 13/30 [01:41<02:19, 8.22s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 47%|████▋ | 14/30 [01:49<02:11, 8.23s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 50%|█████ | 15/30 [01:58<02:03, 8.24s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 53%|█████▎ | 16/30 [02:06<01:55, 8.26s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 57%|█████▋ | 17/30 [02:14<01:47, 8.25s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 60%|██████ | 18/30 [02:22<01:39, 8.26s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 63%|██████▎ | 19/30 [02:31<01:31, 8.28s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 67%|██████▋ | 20/30 [02:39<01:22, 8.27s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 70%|███████ | 21/30 [02:47<01:14, 8.27s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 73%|███████▎ | 22/30 [02:56<01:06, 8.27s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 77%|███████▋ | 23/30 [03:04<00:57, 8.27s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 80%|████████ | 24/30 [03:12<00:49, 8.28s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 83%|████████▎ | 25/30 [03:20<00:41, 8.28s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 87%|████████▋ | 26/30 [03:29<00:33, 8.28s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 90%|█████████ | 27/30 [03:37<00:24, 8.27s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 93%|█████████▎| 28/30 [03:45<00:16, 8.26s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 97%|█████████▋| 29/30 [03:53<00:08, 8.26s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 100%|██████████| 30/30 [04:02<00:00, 8.27s/it]
0%| | 0/498 [00:00<?, ?it/s]
PartitionExplainer explainer: 31it [04:10, 8.35s/it]
In [44]:
a = pd.DataFrame(errors["wine_group"].value_counts()).reset_index()
b = pd.DataFrame(errors["predicted"].value_counts()).reset_index()
c = pd.merge(a,b,left_on = "wine_group", right_on = "predicted", how = "left")
c["error_count"] = c['count_x'] + c['count_y']
c = c[["wine_group", "error_count"]]
c
Out[44]:
| wine_group | error_count | |
|---|---|---|
| 0 | Medium to Full-bodied Reds | 22 |
| 1 | Bold Reds | 21 |
| 2 | Light-bodied, Crisp Whites | 9 |
| 3 | Full-bodied Whites | 8 |
In [45]:
errors["combination"] = errors['wine_group']+ "+" + errors['predicted']
comb_orig = pd.DataFrame(errors["combination"].value_counts()).reset_index()
comb_orig
Out[45]:
| combination | count | |
|---|---|---|
| 0 | Medium to Full-bodied Reds+Bold Reds | 11 |
| 1 | Bold Reds+Medium to Full-bodied Reds | 9 |
| 2 | Light-bodied, Crisp Whites+Full-bodied Whites | 4 |
| 3 | Full-bodied Whites+Light-bodied, Crisp Whites | 3 |
| 4 | Medium to Full-bodied Reds+Light-bodied, Crisp Whites | 1 |
| 5 | Bold Reds+Light-bodied, Crisp Whites | 1 |
| 6 | Medium to Full-bodied Reds+Full-bodied Whites | 1 |
In [57]:
errors = errors.rename_axis('key_orig').reset_index()
errors = errors.rename_axis('key').reset_index()
errors.head()
Out[57]:
| key | key_orig | review_text | wine_variant | wine_group | predicted | combination | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | 27473 | Popped and poured. Big fruit on the nose and taste. Medium finish. Very easy to drink and completely enjoyable! | Cabernet Sauvignon/Zinfandel | Bold Reds | Medium to Full-bodied Reds | Bold Reds+Medium to Full-bodied Reds |
| 1 | 1 | 18471 | Great fruit an acidity, this one could go a bit longer but the fruit is so nice right now, it is almost impossible to resist now. | Cabernet Sauvignon/Zinfandel | Bold Reds | Medium to Full-bodied Reds | Bold Reds+Medium to Full-bodied Reds |
| 2 | 2 | 9500 | Some barnyard funk when opened (not corked). This gradually blew off, but the fruit wasn't showing. In the mouth, soft and smooth, but no real structure left. Short finish. | Cabernet Sauvignon/Zinfandel | Bold Reds | Medium to Full-bodied Reds | Bold Reds+Medium to Full-bodied Reds |
| 3 | 3 | 12308 | Great wine with a very bad cork - it is a great bottle for the price ($25) but it is incredibkle how bad corcks came in this vintage. The good is that the wine is not affected by it. | Cabernet Sauvignon/Zinfandel | Bold Reds | Medium to Full-bodied Reds | Bold Reds+Medium to Full-bodied Reds |
| 4 | 4 | 88 | Very smooth with a lite acid finish. Gets better as it breathes. Complimented the Italian sausage and tomato basil sauce. Over fresh tri-color pasta | Cabernet Sauvignon/Zinfandel | Bold Reds | Medium to Full-bodied Reds | Bold Reds+Medium to Full-bodied Reds |
In [71]:
comb1 = errors[errors["combination"].isin(["Medium to Full-bodied Reds+Bold Reds"])][["key",'wine_group','predicted','review_text']]
print(f'There are {comb1.shape[0]} mislabeled combinations of "Medium to Full-bodied Reds+Bold Reds"')
comb1.head(2)
There are 11 mislabeled combinations of "Medium to Full-bodied Reds+Bold Reds"
Out[71]:
| key_orig | key | wine_group | predicted | review_text | |
|---|---|---|---|---|---|
| 5 | 22357 | 5 | Medium to Full-bodied Reds | Bold Reds | Powerful. Oak, blueberries and char on the nose. All in refrain on the palate with a light lingering oaky vapor trail. |
| 6 | 9301 | 6 | Medium to Full-bodied Reds | Bold Reds | Hmmm. I didn't have the experience the majority to the tasters here had. This was opened in the morning and I tried it that night. Maybe that had something to do with it. The nose was pleasant, with dark fruits and some richness felt. But the palate is very ripe, almost to the point of being syrupy, and it's hot and bitter on the finish. Way out of balance, without enough richness or fruit on the front palate to fill out the high alcohol. I was very impressed with their blends, too, so this wine surprised me. |
In [74]:
for key in comb1["key"]:
#print(f"for observation number - {comb1.iloc[:key]} :")
shap.plots.text(shap_values[key])
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Power
ful
.
Oak
,
blue
berries
and
char
on
the nose
.
All in
refrain
on
the palate
with
a
light
lingering
o
aky
vapor
trail
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Hmmm.
I didn't have
the experience the majority
to
the
tasters
here
had
.
This was opened in
the morning and
I tried it that night.
Maybe that had something
to do with it.
The nose was pleasant,
with dark fruits
and
some richness felt.
But the palate is very ripe, almost to the point of being syrupy,
and it's hot and bitter on the finish.
Way out of balance,
without enough
richness
or
fruit on the front
palate to fill out
the high alcohol
.
I was very impressed with their blends,
too,
so this wine surprised me.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
I
wish
I
didn
't
buy
this
wine
.
It
isn
't
worth
the
price
and
is
unex
cept
ional
.
I
do
not
expect
it
to
get
better
with
time
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
F
ruit
was
gone
,
still
a
graceful
elegance
but
nowhere close
to what
it was
a couple
of years ago
.
This
wine
(
not perfectly
well stored
I am
afraid)
is
in
rapid
decline
.
Drink
up
!
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Wonder
ful
fru
ity
nose
.
Both
the
tip
and
finish
are
fru
ity
with
low
t
ann
ins
and
fru
ity
flavors
lingering
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
The
first
day
I
dec
anted
it for
about 30
minutes
,
and
it had
TONS
of red
oak flavors
.
Way
too
much
!
A lot of
the
oak
went away
on the
second
and
third day
.
A lot
more
fruit
came out
,
and
the wine was MUCH
more enjoyable--even
that third day...
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
1
hr
dec
ant
.
expressive
nose
of
red
and
black fruits
,
black
pepper
,
and
herbs
.
Medium
plus
body
of
black
and
red
raspberry
,
c
edar
,
and
pepper
.
Medium
plus
finish
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Good
,
but
p
aled
in
comparison
to
prior
evening
's
97
C
iac
ci
.
M
uted
bou
quet
,
has shown
little
evolution
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
P
aired
this
with
pork
roast
for
Easter
and
what
a
great
match
.
E
leg
ent
,
yet
sweet
with
a
ro
sey
taste
.
Fruity
finish,
you have to drink cold.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Never
get tired
of
drinking
this
wine
.
D
ont
usually
buy
this
much
of one
wine
,
but
for once
I
am
glad
I
did
.
Same
notes
as
before
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
It is really
nothing special about this wine.
I was told they
bought the
juice
or grapes
and
I am
not too
sure they can do
red as
good as
they do
white
.
Overall not
bad wine
,
dark fruits and
a bit dirty,
a bit
of
tann
ins
and
and
spice
.
Yet poor
QPR
is what makes this
wine unfavorable.
In [75]:
comb2 = errors[errors["combination"].isin(["Bold Reds+Medium to Full-bodied Reds"])][["key",'wine_group','predicted','review_text']]
print(f'There are {comb2.shape[0]} mislabeled combinations of "Bold Reds+Medium to Full-bodied Reds"')
comb2.head(2)
There are 9 mislabeled combinations of "Bold Reds+Medium to Full-bodied Reds"
Out[75]:
| key | wine_group | predicted | review_text | |
|---|---|---|---|---|
| 0 | 0 | Bold Reds | Medium to Full-bodied Reds | Popped and poured. Big fruit on the nose and taste. Medium finish. Very easy to drink and completely enjoyable! |
| 1 | 1 | Bold Reds | Medium to Full-bodied Reds | Great fruit an acidity, this one could go a bit longer but the fruit is so nice right now, it is almost impossible to resist now. |
In [76]:
for key in comb2.index:
shap.plots.text(shap_values[key])
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Po
pped
and
poured
.
Big
fruit
on
the
nose
and
taste
.
Medium
finish
.
Very
easy
to
drink
and
completely enjoyable
!
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Great
fruit
an
acid
ity
,
this one could go a bit longer but
the
fruit
is
so
nice
right
now
,
it
is
almost
impossible
to
resist
now
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Some
barn
yard
funk
when
opened
(
not
corked
).
This
gradually
blew
off
,
but
the
fruit
wasn
't
showing
.
In
the
mouth
,
soft
and
smooth,
but
no
real
structure
left
.
Short
finish
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Great wine with a very bad
c
ork
-
it
is
a
great
bottle
for
the
price
($
25)
but
it
is
incred
ib
k
le
how
bad
cor
cks
came
in
this
vintage
.
The good is that the wine is not affected by it.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Very
smooth
with
a
lite
acid
finish
.
Gets
better
as
it
breat
hes
.
Compl
iment
ed
the
Italian
sausage
and
tomato
basil
sauce
.
Over
fresh
tri
-
color
pasta
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
This wine was presented in my room at
Auber
ge
du
Soleil
in
Rutherford
,
CA
.
It was labeled as
a "reserve
" wine
with the
Auber
ge
label,
but
it was
vinted
and
bottled by Rutherford Hill
.
I was pleasently surprised by the quality of this wine,
especially as it was a "complementary" bottle.
I simply
poured it
through the
Vint
uri
not
expecting
much
,
but I was pleased by
the overall aroma and
finish of this wine
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Drank
this
bottle
before
going
out
for
New
Years
Eve
.
Very
soft
on the palate
,
beautiful
flavor
and
nose with hints of wet
lo
am
.
Dark
crimson
coloring
.
One
of
the
best
wines
I
've ever had
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
This
was
a
beautiful
wine
.
We
drank
it
at
the end
of a
long
evening
of
wine
,
and
it stood out over
all
the
others
.
My
palate
was
well
fried
at
that
point
though
,
so
I
can
't
offer
a
great
tasting
note
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
l
ots
of
sour
cherry
and
pine
,
yet
the
wine
is
both
well
integrated
and
nuanced
in
its
presentation
.
It
seems
that
it
can
benefit
from
6
months
of
cell
aring
,
but
definitely
is
worth tasting
now
.
In [78]:
comb2 = errors[errors["combination"].isin(["Light-bodied, Crisp Whites+Full-bodied Whites"])][["key",'wine_group','predicted','review_text']]
print(f'There are {comb2.shape[0]} mislabeled combinations of "Light-bodied, Crisp Whites+Full-bodied Whites"')
comb2.head(2)
There are 4 mislabeled combinations of "Light-bodied, Crisp Whites+Full-bodied Whites"
Out[78]:
| key | wine_group | predicted | review_text | |
|---|---|---|---|---|
| 19 | 19 | Light-bodied, Crisp Whites | Full-bodied Whites | olden color, honey, apricot, a touch of orange. Sweet, syrapy. Lucious. Could use a touch more acidity. |
| 25 | 25 | Light-bodied, Crisp Whites | Full-bodied Whites | Almost tasted like champagne - which is always a plus. Sweet but not overly sweet, went very well with the shrimp and toasted couscous. Be sure to chill well before serving. |
In [79]:
for key in comb2.index:
shap.plots.text(shap_values[key])
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
old
en
color
,
honey
,
apr
ic
ot
,
a
touch
of
orange
.
Sweet
,
sy
rap
y
.
Luc
ious
.
Could
use
a
touch
more
acid
ity
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Almost
tasted
like
champagne
-
which
is
always
a
plus
.
Sweet
but
not
overly
sweet
,
went
very
well
with
the
shrimp
and
to
asted
cous
c
ous
.
Be
sure
to
chill
well
before
serving
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Deep
orange
gold
,
mouth
water
ing
acid
,
sweet
,
tang
iness
person
ified
,
and
it
goes
on
and
on
.
Ut
ter
ly
wonderful
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
went
exceptionally
well
with
warmed
goat
cheese
on
my
salad
.
crisp
flavors
,
balanced
to
its
acid
icity
,
decent
length
of
finish
In [150]:
comb2 = errors[errors["combination"].isin(["Full-bodied Whites+Light-bodied, Crisp Whites"])][["key",'wine_group','predicted','review_text']]
print(f'There are {comb2.shape[0]} mislabeled combinations of "Light-bodied, Crisp Whites+Full-bodied Whites"')
comb2
There are 3 mislabeled combinations of "Light-bodied, Crisp Whites+Full-bodied Whites"
Out[150]:
| key | wine_group | predicted | review_text | |
|---|---|---|---|---|
| 7 | 7 | Full-bodied Whites | Light-bodied, Crisp Whites | Rich sweet honeysuckle, creamy mint, basil, brown sugar and peach all coming through on the nose. Velvety mouthfeel, nice acidity, good extraction. |
| 11 | 11 | Full-bodied Whites | Light-bodied, Crisp Whites | Luscious, honeysuckle, peach, and apricot on the nose. Quite a full mouth feel but balanced with wonderful acidity and minerality. Lovely wine, rich but not over-the-top. Yum. |
| 17 | 17 | Full-bodied Whites | Light-bodied, Crisp Whites | Nice moderate bright golden color. Nose shows green lemon grass, some herbaceous olive oil action coupled with a light baking spice aroma. The palate shows lots of acid and is extremely young and tightly wound. More citric yellow fruit - lemons, some tart lime and a bit of yellow apple. The finish is highly acidic and needs some time to come into itself. Yet this was a perfect wine to awaken the taste buds before dinner. Nice potential. |
In [151]:
for key in comb2.index:
shap.plots.text(shap_values[key])
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Rich
sweet
hone
ys
uckle
,
creamy
mint
,
basil
,
brown
sugar
and
peach
all
coming
through
on
the
nose
.
V
elve
ty
mouth
feel
,
nice
acid
ity
,
good
extraction
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
L
us
cious
,
hone
ys
uckle
,
peach
,
and
apr
ic
ot
on
the
nose
.
Quite
a
full
mouth
feel
but
balanced with wonderful
acid
ity
and
miner
ality
.
Lovely
wine
,
rich
but
not
over
-
the
-
top
.
Y
um
.
outputs
Bold Reds
Full-bodied Whites
Light-bodied, Crisp Whites
Medium to Full-bodied Reds
inputs
Nice
moderate
bright
golden
color
.
Nose
shows
green
lemon
grass
,
some
herb
aceous
olive
oil
action
coupled
with
a
light
baking
spice
aroma
.
The
palate
shows
lots
of
acid
and
is
extremely
young
and
tightly
wound
.
More
cit
ric
yellow
fruit
-
le
mons
,
some
tart
lime
and
a
bit
of
yellow
apple
.
The
finish
is
highly
acidic
and
needs
some
time
to
come
into
itself
.
Yet
this
was
a
perfect
wine
to
awaken
the
taste
buds
before
dinner
.
Nice
potential
.
In [ ]: